Gaze fingerprint analysis

Load libraries and other custom code.

library(easypackages)
libraries("here","ggplot2","ggridges","reshape2","patchwork","RColorBrewer",
          "plotrix","tidyverse","cluster","ggpackets","wordcloud","proxy",
          "ggeasy","NbClust","MASS","robustbase","MKinfer","impute","ggheatmap", 
          "janitor", "lme4", "lmtest","matlabr","psych")

options(matlab.path = "/Applications/MATLAB_R2021b.app/bin")

codepath = here("code")
resultpath = here("results")
plot_path = here("plots")
source(file.path(codepath,"utils.R"))

plot_title_name = "IIT"

WEIGHT = "none"

nstimuli = 700
fdr_thresh = 0.05
nperm = 1000

Read in data files

stim_names = character(length=nstimuli)
stim_ids = character(length=nstimuli)
for (i in 1:nstimuli){
  stim_names[i] = sprintf("stim%03d",i)
  stim_ids[i] = sprintf("1%03d",i)
}

# read in semantic feature file that tells us which features go along with which stimuli
semantic_features_annot =read.csv(file.path(codepath, "semantic_features.csv"))
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]

semantic_features_annot2 =read.csv(file.path(codepath, "semantic_features_custom.csv"))
semantic_features2 = colnames(semantic_features_annot2)[2:ncol(semantic_features_annot2)]

semantic_features_annot = cbind(semantic_features_annot, semantic_features_annot2[,semantic_features2])
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]

# compute some extra categories from combinations of existing categories
semantic_features_annot$social = (semantic_features_annot[,c("face")]==1 & semantic_features_annot[,c("human")]==1)*1
semantic_features_annot$nonsocial = (semantic_features_annot[,c("human")]==0 & semantic_features_annot[,c("animal")]==0)*1
semantic_features = colnames(semantic_features_annot)[2:ncol(semantic_features_annot)]

# masks for each semantic category
stim_masks = list()
for (sem_feat in semantic_features){
  stim_masks[[sem_feat]] = as.character(semantic_features_annot$stimulus[semantic_features_annot[,sem_feat]==1])
}

# classifier output [0,1] subjects BY stimuli
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perSubStim.csv",WEIGHT))
classifier_output_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(classifier_output_sub_by_stim) = stim_ids

# masks for each semantic category
sublist = rownames(classifier_output_sub_by_stim)
fp_masks = list()
for (subid in sublist){
  fp_masks[[subid]] = colnames(classifier_output_sub_by_stim)[classifier_output_sub_by_stim[subid,]==1]
}

# classifier output [0,1] stimuli BY perm
fname = file.path(resultpath, sprintf("weight_%s_classification_accuracy_perStim_perm.csv",WEIGHT))
classifier_output_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")

# fingerprint ratios subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_fingerprintratios_perSubStim.csv",WEIGHT))
fpratio_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(fpratio_sub_by_stim) = stim_ids

# mean fingerprint ratios stimulus by perm
fname = file.path(resultpath, sprintf("weight_%s_mean_fingerprintratios_perStim_perm.csv",WEIGHT))
mean_fpratio_stim_by_perm = read.csv(fname, row.names=1, na.strings = "NaN")

# intrasubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_intrasubjectR_perSubStim.csv",WEIGHT))
intrasubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(intrasubR_sub_by_stim) = stim_ids

# mean intersubject correlation subject by stimulus
fname = file.path(resultpath, sprintf("weight_%s_mean_intersubjectR_perSubStim.csv",WEIGHT))
mean_intersubR_sub_by_stim = read.csv(fname, row.names=1, na.strings = "NaN")
colnames(mean_intersubR_sub_by_stim) = stim_ids

# nfingerprintable stimuli per subject
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub.csv",WEIGHT))
nfpstim_per_subject_original = read.csv(fname, row.names=1, na.strings = "NaN")

#weight nfp stimuli by the missing values in the intra correlation matrix
nfpstim_per_subject_original$missing_tot <- 700 - rowSums(is.na(intrasubR_sub_by_stim)) #calculate number of complete obs
nfpstim_per_subject_original$missing_ratio <- (nfpstim_per_subject_original$missing_tot/700) #divide complete obs by total number of stimuli
nfpstim_per_subject_original <- nfpstim_per_subject_original %>% mutate(fpstimuli_ratioed = (round(missing_ratio*nfingerprintable_stimuli, 0))) #multiple the complete obs to stimuli number ratio by the number of fingerprintable stimuli (this will reduce the count of fp stimuli for participants with missing data)

#create a new dataframe that contains the weighted number of fp stimuli
nfpstim_per_subject <- data.frame(nfpstim_per_subject_original$fpstimuli_ratioed)
rownames(nfpstim_per_subject) <- rownames(nfpstim_per_subject_original)
nfpstim_per_subject <- nfpstim_per_subject %>% rename(nfingerprintable_stimuli = nfpstim_per_subject_original.fpstimuli_ratioed)

# nfingerprintable stimuli subject by perm
fname = file.path(resultpath, sprintf("weight_%s_classification_nfingerprintableStimuli_perSub_perm.csv",WEIGHT))
nfpstim_sub_by_perm_original = read.csv(fname, row.names=1, na.strings = "NaN")
nfpstim_sub_by_perm = round(nfpstim_sub_by_perm_original*nfpstim_per_subject_original$missing_ratio, 0)

#read in phenotypic data
pheno_data = read.csv(here("data","pheno","pheno_OSIE.csv"))

na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
subs2exclude = rownames(intrasubR_sub_by_stim)[na_mask]
mask1 = is.element(pheno_data$subj_ID,rownames(intrasubR_sub_by_stim))
mask2 = !is.element(pheno_data$subj_ID, subs2exclude)
mask = mask1 & mask2
pheno_data_sub = pheno_data %>% filter(mask)
describe(pheno_data_sub)
##                     vars   n   mean    sd median trimmed   mad   min    max
## subj_ID*               1 105  53.00 30.45  53.00   53.00 38.55  1.00 105.00
## sex*                   2 105   1.50  0.50   2.00    1.51  0.00  1.00   2.00
## age                    3 105  26.21  6.30  25.00   25.34  4.45 18.00  50.00
## IQ                     4  91 108.65 13.09 110.00  108.99 10.38 74.50 138.00
## test_retest_delay      5 105   7.33  1.46   7.00    7.00  0.00  4.00  16.00
## is_imagelist_shared    6 105   0.22  0.42   0.00    0.15  0.00  0.00   1.00
## S1_CalError_OSIE       7 105   0.48  0.30   0.42    0.44  0.17  0.11   2.59
## S2_CalError_OSIE       8 105   0.48  0.22   0.43    0.45  0.17  0.11   1.38
## Diff_CalError_OSIE     9 105   0.20  0.24   0.14    0.16  0.11  0.00   2.07
## AQ_Tot                10 105  16.92  6.72  16.00   16.55  4.45  4.00  38.00
## AQ_Soc                11 105   3.14  3.05   2.00    2.67  1.48  0.00  13.00
## AQ_Comm               12 105   1.66  1.59   1.00    1.46  1.48  0.00   8.00
## AQ_AttDet             13 105   3.46  1.92   3.00    3.41  2.97  0.00   7.00
## SRS_Tot_Raw           14 105  45.16 22.77  42.00   43.28 19.27  9.00 151.00
## SRS_DSM5_SOC          15 105  26.18 13.51  23.00   25.00 11.86  5.00  83.00
## SRS_DSM5_RRB          16 105  18.98 10.66  18.00   18.02 10.38  2.00  68.00
## SRS_Emp_ER            17 105  18.11  8.71  17.00   17.59  7.41  3.00  46.00
## SRS_Emp_Avoid         18 105   4.99  3.47   4.00    4.59  2.97  0.00  21.00
## SRS_Emp_IR            19 105   3.08  2.79   2.00    2.66  1.48  0.00  16.00
## SRS_Emp_Same          20 105  14.28  7.66  13.00   13.56  7.41  1.00  46.00
## SRS_Emp_RM            21 105   4.70  3.62   4.00    4.34  2.97  0.00  22.00
## SQ_Tot                22  98  56.43 17.52  53.50   55.23 14.83 22.00 114.00
##                      range  skew kurtosis   se
## subj_ID*            104.00  0.00    -1.23 2.97
## sex*                  1.00 -0.02    -2.02 0.05
## age                  32.00  1.41     2.09 0.62
## IQ                   63.50 -0.32    -0.15 1.37
## test_retest_delay    12.00  3.76    16.15 0.14
## is_imagelist_shared   1.00  1.34    -0.21 0.04
## S1_CalError_OSIE      2.47  3.77    22.93 0.03
## S2_CalError_OSIE      1.28  1.31     2.13 0.02
## Diff_CalError_OSIE    2.07  4.79    31.75 0.02
## AQ_Tot               34.00  0.76     1.14 0.66
## AQ_Soc               13.00  1.33     1.22 0.30
## AQ_Comm               8.00  1.30     1.95 0.16
## AQ_AttDet             7.00  0.12    -1.00 0.19
## SRS_Tot_Raw         142.00  1.37     3.74 2.22
## SRS_DSM5_SOC         78.00  1.20     2.41 1.32
## SRS_DSM5_RRB         66.00  1.26     3.16 1.04
## SRS_Emp_ER           43.00  0.72     0.54 0.85
## SRS_Emp_Avoid        21.00  1.39     3.25 0.34
## SRS_Emp_IR           16.00  1.90     4.77 0.27
## SRS_Emp_Same         45.00  1.10     2.06 0.75
## SRS_Emp_RM           22.00  1.37     3.65 0.35
## SQ_Tot               92.00  0.76     0.41 1.77
table(pheno_data_sub$sex)
## 
##  F  M 
## 52 53

Compute summary identification matrices by semantic features

code2run = sprintf("cd %s", codepath)
code2run = sprintf("%s; resultpath = '%s'",code2run, resultpath)
code2run = sprintf("%s; load('%s')",code2run, file.path(resultpath, "weight_none_gsa_res.mat"))

code2run = sprintf("%s; id_mat = gsa_res.id_mat; id_mat_symm = id_mat; tmp_mat = squeeze(id_mat(:,:,1)); mask = tril(tmp_mat)~=0; for istim = 1:size(id_mat,3); tmp_mat = squeeze(id_mat(:,:,istim)); tmp_mat_rev = tmp_mat'; tmp_mat2use = tmp_mat; tmp_mat2use(mask) = tmp_mat_rev(mask); id_mat_symm(:,:,istim) = tmp_mat2use; end; id_mat = id_mat_symm; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,:))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct; tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,'weight_none_id_mat_all.csv'); writetable(tab2write, file2write)
",code2run)

code2run = sprintf("%s; semantic_features = readtable('%s')", code2run, file.path(resultpath, "semantic_features.csv"))
code2run = sprintf("%s; features2use = semantic_features.Properties.VariableNames(3:end)", code2run)

code2run = sprintf("%s; for i=1:length(features2use); curr_feature = features2use{i}; disp(curr_feature); feature_mask = semantic_features.(curr_feature)==1; tmp_ct = nan(size(id_mat,1),size(id_mat,2)); for isub = 1:size(id_mat,1); tmp_mat = squeeze(id_mat(isub,:,feature_mask))'; tmp_ct(isub,:) = nanmedian(tmp_mat,1); end; data2use = tmp_ct;  tab2write = cell2table([gsa_res.subids, num2cell(data2use)], 'VariableNames',[{'subid'},gsa_res.subids']); file2write = fullfile(resultpath,['weight_none_id_mat_',curr_feature,'.csv']); writetable(tab2write, file2write); end; exit", code2run)

res = run_matlab_code(code2run)

Read in semantic features and cluster them

# heatmap of semantic feature matrix to see which semantic features are usually co-occuring 
df_sem_feat = as.matrix(semantic_features_annot[,2:ncol(semantic_features_annot)])

clust_method = "ward.D2"
dist_method = "binary" 
nbc_index = "silhouette"

# cluster stimuli
stim_feat_res = NbClust(data = as.matrix(df_sem_feat), 
                       index = nbc_index, 
                       distance = dist_method, 
                       method = clust_method, 
                       min.nc=2, max.nc=6)
k_stim = stim_feat_res$Best.nc
row_cols = brewer.pal(k_stim[[1]], "Set1")[stim_feat_res$Best.partition]

# add clustering solution to semantic_features_annot
cluster_df = data.frame(stimulus = factor(semantic_features_annot$stimulus), 
                        cluster = factor(stim_feat_res$Best.partition))
semantic_features_annot = merge(cluster_df,semantic_features_annot[, c("stimulus",semantic_features)], by = "stimulus") 

# cluster semantic features
sem_feat_res = NbClust(data = as.matrix(t(df_sem_feat)), 
                       index = nbc_index, 
                       distance = dist_method, 
                       method = clust_method, 
                       min.nc=2, max.nc=6)
k_sem_feat = sem_feat_res$Best.nc
col_cols = brewer.pal(k_sem_feat[[1]], "Set1")[sem_feat_res$Best.partition]

# make heatmap
stim_hmap_res = heatmap(as.matrix(df_sem_feat), 
        hclustfun=function(d) hclust(d, method=clust_method), 
        scale = "none", 
        RowSideColors=row_cols, 
        ColSideColors=col_cols)

stim_order_dendrogram = stim_hmap_res$rowInd
stim_hmap_res
## $rowInd
##   [1] 298 508 675 128 683  24 414   7 259 307 643 526  38 452  57  31 129 496
##  [19] 208  76  54  19  73 264  37 594   9  82 122 635 622 551 507 375 214 175
##  [37] 202 528 193  68   8 349 672 311 473 273  80 231 599 490 299 399 276 198
##  [55] 564 404 377 123  12  97 598 515 174 424 519 645 166  87 337 179 699 625
##  [73] 489 417 324 322 316 309 171 211 652 541  29 488 293 403 280 654 353 116
##  [91] 314 591 655 476 266 134 230 471 323 251 245 199  92  91   5  16 487 659
## [109] 658 423 192  30 151 677 554  93 250 317  67  15  35 595 410  40 234 426
## [127] 262 416 644 315  21  43 272 137  49   1  26 687 664 653 607 559 557 523
## [145] 362 334 305 281 152 239 449 347  62 243 104 690  98  36  86  79 691 673
## [163] 556 548 499 384 105 109 604 671 651 458 363 328 209 120 153 638 624 383
## [181] 492 670  99 144 133 393 456 391 149 387 674 428 235 256 369 612 469  14
## [199] 275 520 678 400 589 581  65 118 642 306 371 665 660 534 634 143 667 381
## [217] 210 247 288 516 656 270  44 246 696  17 447 646  22 165 356 374 341  90
## [235] 390 279  61 150 662 558 261 457 578 613 583 350 479  52 530 700 682 420
## [253] 146 163 265 623 605 584 572 411 402 348 228 115  85   3  72 681 666 112
## [271] 318  81 278 629 609 582 346 302 289  25  84 514 320 453 693  58 639  69
## [289] 248 689 533 330 292  41 139 597 335 697 333 425 686 577 465 232 359 148
## [307] 552 434 477 161 580 373  60  71 310 421  63 100 661 332 647 257 611 159
## [325] 140 561 506 329 180  45 110 200 154 540 370 562 650 549 167 537 531 444
## [343] 338 282 156 147 354 158 679 585 368 263 698 503   4 379 602 657 430 569
## [361] 441 626 429 475 188 297 119 177 627  88 186 521  74 249 380 252 269 633
## [379] 603 482 408 191 244 571 542 497 450 419 212  83 203 308 355 344 205 481
## [397] 361 593 445 461 553 610 227 178 187 409 576 398 543 418 570 685 529 692
## [415] 579 459 588 586 493 467 451 385 366 378 242 285 641  46 164 630 592 454
## [433] 241 224 184  27 172 535 443 422 219  47  55 640 460 213 206  94 101 437
## [451] 201  33 225 486 223 621 412 494 518 168 466 560 357 616 319 448 107 325
## [469] 608 614 401 547 326 303 301  59 233 573 483 238 162 131 114 121 480 204
## [487] 536 505 512 555 485 498 550 501 502 565 382 649 510 491 440 389 395  48
## [505]  70 376 522 567 406 468 255 260 436 364 258 226  56 194 296 511 439 539
## [523] 372 394 669  78 217 396 504 221 397 111 196 351 185  50 222 636 606 600
## [541] 566 478 474 435 343 294 220 197 176 155  10  18 574 431 283  32 267 568
## [559] 253 271 313 618 601 538  53 321 500 632 495 472 470 463 438 427 415 342
## [577] 284 268 181  13 160 587 290 173 237 102 563 336 513 631 388 527  77 525
## [595]   2 545 637   6 215 145 157  64  95 170  11 130 620 517 433  89 358 345
## [613] 142 339 446 442 125 386 340 432 124 367 464 141 182 254 407 405 676 195
## [631] 286 277 392 413 546 240 103 138 695 236 229  66  51 360 189 365 617 615
## [649] 663 352 532 484 509 619 304  42 596 108 218 524 127 327 680 291 300 668
## [667] 106 113  96 117 688 648 455 183 287 590 544 190 216 462 207 136 331 132
## [685] 575 694  34 684 126 295 628  23  39 312 135 169  75 274  28  20
## 
## $colInd
##  [1]  1 10 14  3  4  5  7  9 16  6  8 12 11  2 15 13
## 
## $Rowv
## NULL
## 
## $Colv
## NULL
cluster_res = data.frame(clust_labels = stim_feat_res$Best.partition, clust_colors = row_cols)
ctab = as.data.frame(table(cluster_res$clust_labels))
for (i in 1:k_stim[[1]]){
  tmp_col = as.character(unique(cluster_res$clust_colors[cluster_res$clust_labels==i]))
  ctab$color[i] = plotrix::color.id(tmp_col)
  ctab$clust_num[i] = unique(cluster_res$clust_labels[cluster_res$clust_labels==i])
}
ctab
##   Var1 Freq      color clust_num
## 1    1  158 firebrick2         1
## 2    2  542  steelblue         2
nfpstim_per_subject$c1 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==1])
nfpstim_per_subject$c2 = rowSums(classifier_output_sub_by_stim[,cluster_res$clust_labels==2])

# get sum of stimuli within semantic clusters and plot as a word cloud
sem_feat_cols = unique(col_cols)
stim_cols = unique(row_cols)

Can we gaze fingerprint people?

# function to do gaze fingerprint classification
gaze_fingerprint_classifier <- function(file2use, na_mask, nperm=0){
  tmp_data = read.csv(file2use, row.names=1)
  tmp_data = tmp_data[!na_mask,!na_mask]
  tmp_res = matrix(nrow = dim(tmp_data)[1], ncol=3)
  for (isub in 1:dim(tmp_data)[1]){
    tmp_res[isub,1] = isub 
    tmp_res[isub,2] = which(tmp_data[isub,]==max(tmp_data[isub,])) 
  }
  tmp_res[,3] = tmp_res[,1]==tmp_res[,2]
  accuracy = sum(tmp_res[,3])/dim(tmp_data)[1]
  
  result = data.frame(matrix(nrow = 1, ncol = 3))
  colnames(result) = c("accuracy", "mean_null", "pval")
  result$accuracy = accuracy

  
  if (nperm>0){
    tmp_perm_res = data.frame(matrix(nrow = nperm, ncol = 2))
    colnames(tmp_perm_res) = c("perm_num","accuracy")
    for (iperm in 1:nperm){
      # print(iperm)
      subids = rownames(tmp_data)
      set.seed(iperm)
      rand_perm = sample(length(subids))
      perm_tmp_data = tmp_data[subids[rand_perm],]
      
      perm_tmp_res = matrix(nrow = dim(perm_tmp_data)[1], ncol=3)
      for (isub in 1:dim(perm_tmp_data)[1]){
        perm_tmp_res[isub,1] = isub 
        perm_tmp_res[isub,2] = which(perm_tmp_data[isub,]==max(perm_tmp_data[isub,])) 
      }
      perm_tmp_res[,3] = perm_tmp_res[,1]==perm_tmp_res[,2]
      perm_accuracy = sum(perm_tmp_res[,3])/dim(perm_tmp_data)[1]
      tmp_perm_res[iperm, "perm_num"] = iperm
      tmp_perm_res[iperm, "accuracy"] = perm_accuracy
    }
    
    p_value = (sum(tmp_perm_res$accuracy>=accuracy)+1)/(nperm+1)
    result$pval = p_value
    result$mean_null = mean(tmp_perm_res$accuracy)
  }
  
  return(result)
} # function gaze_fingerprint_classifier

# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)

features2use = c("all", semantic_features)

gfp_res = data.frame(matrix(nrow = length(features2use), ncol = 3))
colnames(gfp_res) = c("accuracy", "mean_null", "pval")
rownames(gfp_res) = features2use
for (ifeature in 1:length(features2use)){
  file2use = file.path(resultpath,sprintf("weight_%s_id_mat_%s.csv",WEIGHT, features2use[ifeature]))
  gfp_res[ifeature,] = gaze_fingerprint_classifier(file2use, na_mask, nperm=nperm)
}
gfp_res
##               accuracy   mean_null        pval
## all          0.5047619 0.009571429 0.000999001
## text         0.5142857 0.009304762 0.000999001
## face         0.5047619 0.009352381 0.000999001
## emotion      0.4000000 0.009266667 0.000999001
## sound        0.2761905 0.009504762 0.000999001
## smell        0.4571429 0.009600000 0.000999001
## taste        0.5238095 0.009714286 0.000999001
## touch        0.4857143 0.009676190 0.000999001
## motion       0.4761905 0.009266667 0.000999001
## operability  0.4952381 0.009590476 0.000999001
## watchability 0.5238095 0.009314286 0.000999001
## touched      0.4857143 0.009209524 0.000999001
## gazed        0.4571429 0.009428571 0.000999001
## human        0.5047619 0.009542857 0.000999001
## animal       0.3904762 0.009647619 0.000999001
## social       0.5047619 0.009533333 0.000999001
## nonsocial    0.4857143 0.009257143 0.000999001
data2plot = gfp_res
data2plot$feature = rownames(gfp_res)
data2plot = data2plot %>% filter(!feature=="all")
data2plot$cluster = NA
red_clust = c("text","watchability")
yellow_clust = c("animal")
green_clust = c("emotion","sound")
purple_clust = c("smell","operability","nonsocial","taste")
blue_clust = c("human","touched","social","face","motion","gazed")
orange_clust = c("touch")
data2plot$cluster[is.element(data2plot$feature,red_clust)] = "red"
data2plot$cluster_color[is.element(data2plot$feature,red_clust)] = "#E41A1C"
data2plot$cluster[is.element(data2plot$feature,blue_clust)] = "blue"
data2plot$cluster_color[is.element(data2plot$feature,blue_clust)] = "#377EB8"
data2plot$cluster[is.element(data2plot$feature,green_clust)] = "green"
data2plot$cluster_color[is.element(data2plot$feature,green_clust)] = "#4DAF4A"
data2plot$cluster[is.element(data2plot$feature,purple_clust)] = "purple"
data2plot$cluster_color[is.element(data2plot$feature,purple_clust)] = "#984EA3"
data2plot$cluster[is.element(data2plot$feature,orange_clust)] = "orange"
data2plot$cluster_color[is.element(data2plot$feature,orange_clust)] = "#FF7F00"
data2plot$cluster[is.element(data2plot$feature,yellow_clust)] = "yellow"
data2plot$cluster_color[is.element(data2plot$feature,yellow_clust)] = "#FFFF33"

data2plot$cluster = factor(data2plot$cluster)
data2plot$feature = factor(data2plot$feature, levels = rev(c("taste","watchability","text","face","human","social","operability","touch","touched","nonsocial","motion","smell","gazed","emotion","animal","sound")))

# load in bootstrap accuracy for all stimuli
bootaccall = read.csv(file.path(resultpath,"bootstrap_accuracy_95CIs.csv"))

p = ggplot(data = data2plot, aes(y = feature, x = accuracy, fill = cluster)) + geom_bar(stat = "identity") + geom_vline(xintercept=gfp_res["all","accuracy"]) + geom_vline(xintercept=bootaccall[1,"low95"], linetype = "longdash") + geom_vline(xintercept=bootaccall[1,"hi95"], linetype = "longdash") + scale_fill_manual(values = c("#377EB8","#4DAF4A","#FF7F00","#984EA3","#E41A1C","#FFFF33")) + ylab("Semantic Feature") + xlab("Accuracy") + coord_cartesian(xlim=c(0.25,0.80)) + guides(fill="none") + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_global_semantic_features.pdf"),
       width = 4, height = 5)
p

wordcloud(words = data2plot$feature, 
                  freq = data2plot$accuracy^12, 
                  random.order = FALSE, 
                  rot.per=0, 
                  colors=data2plot$cluster_color, 
                  ordered.colors=TRUE)

df_res = classifier_output_sub_by_stim
df_res = df_res[!na_mask,]
df_res_perm = classifier_output_stim_by_perm
nperm = ncol(df_res_perm)

subs2use = rownames(df_res)
df2plot = data.frame(stim_names = stim_names,
                     stim_ids = stim_ids,
                     accuracy = colSums(df_res[,stim_ids])/dim(df_res)[1],
                     site="IIT")
df2plot$site = factor(df2plot$site)

# calculate p-values based on permutation accuracies
for (istim in 1:nstimuli){
  df2plot$pval[istim] = (sum(df_res_perm[istim,]>=df2plot$accuracy[istim])+1)/(nperm+1)
}

# calculate FDR
df2plot$fdr = p.adjust(df2plot$pval, method = "fdr")
df2plot$fingerprint = "No"
df2plot$fingerprint[df2plot$fdr<=fdr_thresh] = "Yes"

df2plot = merge(df2plot, semantic_features_annot, by.x = "stim_ids", by.y= "stimulus")
df2plot$stim_cluster_name = "C2"
df2plot$stim_cluster_name[df2plot$cluster==1] = "C1"

## Accuracy to fingerprintability plots

# make accuracy plot across all stimuli
p1 = ggplot(data = df2plot, aes(x = fingerprint, y = accuracy, colour=fingerprint)) + 
  geom_jitter(width=0.1, alpha = 0.5) + 
  geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") + 
  scale_colour_manual(values = c("orange","dodger blue")) + guides(colour=FALSE) +
  xlab("Fingerprintable") + 
  ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()

# make accuracy plot, but color according to stimulus cluster
p2 = ggplot(data = df2plot, aes(x = stim_cluster_name, y = accuracy, colour=stim_cluster_name)) +
  geom_jitter(width=0.1, alpha = 0.5) + 
  geom_violin(draw_quantiles = c(0.25, 0.5, 0.75), fill=NA, colour="black") + 
  guides(colour=FALSE) + scale_colour_manual(values = c("#E41A1C","#377EB8")) + 
  xlab("Stimulus Cluster") + 
  ylab("Accuracy") + ggtitle(plot_title_name) + easy_center_title()

p_final = p1+p2
p_final

# tabulate number of stimuli where fingerprinting is possible or not
table(df2plot$fingerprint)
## 
##  No Yes 
##  38 662
# what percentage of the 700 stimuli allow us to gaze fingerprint?
sum(df2plot$fingerprint=="Yes")/nstimuli
## [1] 0.9457143
# tabulate number of stimuli where fingerprinting is possible or not, by stimulus cluster
table(df2plot$fingerprint, df2plot$stim_cluster_name)
##      
##        C1  C2
##   No   13  25
##   Yes 145 517
# chi-square test on that fingerprint by stimulus cluster contingency table
chisq.test(table(df2plot$fingerprint, df2plot$stim_cluster_name))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(df2plot$fingerprint, df2plot$stim_cluster_name)
## X-squared = 2.4502, df = 1, p-value = 0.1175
# t-test comparing accuracy of C2 vs C1 stimuli
t.test(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"])
## 
##  Welch Two Sample t-test
## 
## data:  df2plot$accuracy[df2plot$stim_cluster_name == "C2"] and df2plot$accuracy[df2plot$stim_cluster_name == "C1"]
## t = 4.3784, df = 292.24, p-value = 1.667e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.004113387 0.010830929
## sample estimates:
##  mean of x  mean of y 
## 0.04707433 0.03960217
# Cohen's d effect size for accuracy of C2 vs C1 stimuli
dres = cohens_d(df2plot$accuracy[df2plot$stim_cluster_name=="C2"], df2plot$accuracy[df2plot$stim_cluster_name=="C1"]); dres
## [1] 0.3643683
# Accuracies across all semantic feature categories
cols2use = c("feature","accuracy","std","sem")
result_df = data.frame(matrix(nrow = length(semantic_features), ncol = length(cols2use)))
rownames(result_df) = semantic_features
colnames(result_df) = cols2use

for (sem_feat in semantic_features){
  mask = semantic_features_annot[sem_feat]==1
  stims2use = stim_ids[mask]
  result_df[sem_feat,"feature"] = sem_feat
  result_df[sem_feat,"accuracy"] = mean(colSums(df_res[stims2use])/dim(df_res)[1])
  result_df[sem_feat,"std"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])
  result_df[sem_feat,"sem"] = sd(colSums(df_res[,stims2use])/dim(df_res)[1])/sqrt(sum(mask))
}

# Fingerprint Ratios across all semantic feature categories
cols2use = semantic_features
result_df_fpr = data.frame(matrix(nrow = dim(fpratio_sub_by_stim)[1], ncol = length(semantic_features)))
rownames(result_df_fpr) = rownames(fpratio_sub_by_stim)
colnames(result_df_fpr) = cols2use

for (sem_feat in semantic_features){
  mask = semantic_features_annot[sem_feat]==1
  stims2use = stim_ids[mask]
  result_df_fpr[,sem_feat] = rowMeans(fpratio_sub_by_stim[,stims2use], na.rm= TRUE)
}
result_df_fpr$subid = rownames(result_df_fpr)
df2plot = melt(result_df_fpr, id.vars = "subid")

result_df$feature = with(result_df, reorder(feature, accuracy))
feature_clusters = data.frame(feature = names(sem_feat_res$Best.partition), cluster = sem_feat_res$Best.partition)
result_df = merge(result_df, feature_clusters, by = "feature")
result_df$cluster = factor(result_df$cluster) 
result_df
##         feature   accuracy        std          sem cluster
## 1        animal 0.04551639 0.02063842 0.0016630911       6
## 2       emotion 0.04986395 0.02169390 0.0018334691       3
## 3          face 0.04716205 0.02112236 0.0009253872       2
## 4         gazed 0.04727891 0.02071741 0.0014798149       2
## 5         human 0.04765890 0.02113001 0.0009664643       2
## 6        motion 0.04523063 0.02033629 0.0011386137       2
## 7     nonsocial 0.03960217 0.01816334 0.0014449980       4
## 8   operability 0.04395238 0.01983713 0.0014026970       4
## 9         smell 0.04342038 0.02305053 0.0023902282       4
## 10       social 0.04780620 0.02108902 0.0009854252       2
## 11        sound 0.04668228 0.01753562 0.0022452057       3
## 12        taste 0.04554205 0.02312023 0.0016862164       4
## 13         text 0.04862563 0.02183029 0.0013918483       1
## 14        touch 0.04169628 0.01934604 0.0013645644       5
## 15      touched 0.04784736 0.02132391 0.0012478875       2
## 16 watchability 0.04617389 0.02091404 0.0010430976       1
p = ggplot(data = result_df, aes(y = feature, x = accuracy, fill = cluster)) + 
  geom_bar(stat = "identity") + 
  geom_errorbar(aes(xmin=accuracy-sem, xmax=accuracy+sem), width=0.2) +
  guides(fill=FALSE) + 
  coord_cartesian(xlim = c(0.035, 0.09)) + 
  xlab("Accuracy") + ylab("Semantic Feature") + 
  scale_fill_manual(values = unique(col_cols)) + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprint_accuracy_local_semantic_features.pdf"),
       width = 4, height = 5)
p

How many stimuli can we use to gaze fingerprint an individual and how does this vary between people?

fp_res = nfpstim_per_subject
fp_res$subids = rownames(fp_res)
fp_perm_res = nfpstim_sub_by_perm

na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
fp_res = fp_res %>% filter(!na_mask)
fp_perm_res = fp_perm_res %>% filter(!na_mask)

# figure out p-values for each subject based on permutation nfingerprintable stim 
for (isub in 1:dim(fp_res)[1]){
  fp_res$pval[isub] = (sum(fp_perm_res[isub,]>=fp_res$nfingerprintable_stimuli[isub])+1)/(nperm+1)
}

fp_res$fdr = p.adjust(fp_res$pval, method = "fdr")

fp_res$Fingerprintable = "Yes"
fp_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"
fp_perm_res$Fingerprintable = "Yes"
fp_perm_res$Fingerprintable[fp_res$fdr>fdr_thresh] = "No"

# subjects who have significantly more nfingerprintable stimuli than expected by chance
fp_subs2include = fp_res$subids[fp_res$fdr<=fdr_thresh]
print(sprintf("%d subjects of %d with statistically significant number of fingerprintable stimuli",
              length(fp_subs2include), 
              dim(fp_res)[1]))
## [1] "99 subjects of 105 with statistically significant number of fingerprintable stimuli"
# subjects whose nfingerprintable stimuli is no better than chance
fp_subs2exclude = fp_res$subids[fp_res$fdr>fdr_thresh] 
print(sprintf("%d subjects of %d with non-statistically significant number of fingerprintable stimuli",
              length(fp_subs2exclude), 
              dim(fp_res)[1]))
## [1] "6 subjects of 105 with non-statistically significant number of fingerprintable stimuli"
reorder_vect = order(fp_res[,"nfingerprintable_stimuli"])
ordered_fp_res_subs = rownames(fp_res)[reorder_vect]
ordered_fp_res = fp_res[ordered_fp_res_subs,]
ordered_fp_res
##            nfingerprintable_stimuli c1 c2     subids        pval        fdr
## a8681Py534                        0  0  0 a8681Py534 1.000000000 1.00000000
## hvu5113B2r                        4  0  4 hvu5113B2r 0.469530470 0.47404519
## j51h15E4xv                        7  3  4 j51h15E4xv 0.111888112 0.11406070
## nxaQkud83t                        9  4  5 nxaQkud83t 0.049950050 0.05244755
## Wj74c75671                        9  2  7 Wj74c75671 0.042957043 0.04602540
## T1358ru144                       10  4  7 T1358ru144 0.006993007 0.01234060
## b9u81qz6G5                       11  2  9 b9u81qz6G5 0.097902098 0.10078157
## b57665B367                       11  3  8 b57665B367 0.030969031 0.03422893
## P9a579it51                       11  4  7 P9a579it51 0.061938062 0.06439105
## M3go34rg95                       11  4  8 M3go34rg95 0.004995005 0.01234060
## Yjkv6qz9d1                       11  1 11 Yjkv6qz9d1 0.016983017 0.01981352
## I89n948112                       13  3 12 I89n948112 0.045954046 0.04873914
## o77M6w8m26                       13  4 11 o77M6w8m26 0.029970030 0.03347716
## b5F118a654                       14  3 12 b5F118a654 0.009990010 0.01380199
## b1l2a37p3Z                       14  4 11 b1l2a37p3Z 0.038961039 0.04217432
## aadbq388Ob                       15  2 13 aadbq388Ob 0.021978022 0.02508361
## ytbukz3wX4                       15  4 11 ytbukz3wX4 0.014985015 0.01808536
## tu2b9S1ms9                       15  4 11 tu2b9S1ms9 0.016983017 0.01981352
## u17w5462Em                       17  5 12 u17w5462Em 0.010989011 0.01424501
## Fljc85x4j4                       17  4 14 Fljc85x4j4 0.000999001 0.01234060
## okj19fo9Fh                       17  5 12 okj19fo9Fh 0.004995005 0.01234060
## ewv9511u6J                       17  3 14 ewv9511u6J 0.002997003 0.01234060
## x5Ny88741k                       17  2 15 x5Ny88741k 0.007992008 0.01234060
## vhs2B895gm                       18  3 15 vhs2B895gm 0.007992008 0.01234060
## I28284j41f                       18  4 14 I28284j41f 0.003996004 0.01234060
## ggr1delQ9y                       18  5 14 ggr1delQ9y 0.012987013 0.01585624
## fgJ6xw7668                       18  5 14 fgJ6xw7668 0.007992008 0.01234060
## x75646ye9T                       19  4 15 x75646ye9T 0.006993007 0.01234060
## wG7615jxji                       19  4 15 wG7615jxji 0.006993007 0.01234060
## y9s1dvTq2q                       19  2 17 y9s1dvTq2q 0.002997003 0.01234060
## neCz5h3c81                       19  1 18 neCz5h3c81 0.003996004 0.01234060
## c2i1N145o5                       19  5 14 c2i1N145o5 0.007992008 0.01234060
## m3837o9wA8                       20  3 17 m3837o9wA8 0.004995005 0.01234060
## z4z93Q7hs2                       20  4 16 z4z93Q7hs2 0.012987013 0.01585624
## z4wsu3Z4l2                       20  3 17 z4wsu3Z4l2 0.010989011 0.01424501
## hy2wC38x85                       20  6 14 hy2wC38x85 0.003996004 0.01234060
## xj3R2c13ha                       21  3 18 xj3R2c13ha 0.004995005 0.01234060
## ox17xwBqy6                       21  7 14 ox17xwBqy6 0.009990010 0.01380199
## j6r171siTa                       21 10 11 j6r171siTa 0.001998002 0.01234060
## Zd122o2d96                       21  4 17 Zd122o2d96 0.017982018 0.02074848
## p15y46v54P                       22  1 21 p15y46v54P 0.002997003 0.01234060
## uw968g1F9o                       22  3 19 uw968g1F9o 0.002997003 0.01234060
## s145O1udx1                       23  6 18 s145O1udx1 0.031968032 0.03496503
## gJuiz877gn                       23  9 15 gJuiz877gn 0.005994006 0.01234060
## S91d18oh3x                       23  4 21 S91d18oh3x 0.005994006 0.01234060
## x1J5788214                       23  6 18 x1J5788214 0.003996004 0.01234060
## tx43oiW5ps                       24  3 21 tx43oiW5ps 0.003996004 0.01234060
## j1218Kcj5g                       24  4 21 j1218Kcj5g 0.011988012 0.01535050
## ax5I99lf78                       25  2 23 ax5I99lf78 0.002997003 0.01234060
## Opb6k5eke1                       25  4 21 Opb6k5eke1 0.024975025 0.02819761
## b83u91R6q5                       25  2 23 b83u91R6q5 0.001998002 0.01234060
## dKfyt77oq4                       25  3 22 dKfyt77oq4 0.005994006 0.01234060
## jmHk8186fv                       25  9 16 jmHk8186fv 0.005994006 0.01234060
## e813o11n7T                       26  7 19 e813o11n7T 0.004995005 0.01234060
## M97oq2u9z2                       26  2 26 M97oq2u9z2 0.007992008 0.01234060
## p6o2qe1xQt                       28  4 24 p6o2qe1xQt 0.008991009 0.01329656
## kju391wN3s                       28  9 19 kju391wN3s 0.007992008 0.01234060
## yzfoH84397                       30  8 22 yzfoH84397 0.003996004 0.01234060
## jT2r93k9m8                       31  7 26 jT2r93k9m8 0.005994006 0.01234060
## uxxiod4U24                       31  4 31 uxxiod4U24 0.002997003 0.01234060
## k7ifvzH4yx                       31  7 24 k7ifvzH4yx 0.009990010 0.01380199
## tb51gQ2f32                       32  9 23 tb51gQ2f32 0.003996004 0.01234060
## qixzE71638                       33  8 25 qixzE71638 0.004995005 0.01234060
## b1Oe7158q9                       33  7 27 b1Oe7158q9 0.004995005 0.01234060
## Yoea9j7gg5                       33  3 30 Yoea9j7gg5 0.007992008 0.01234060
## wUe1ha8l71                       35  7 28 wUe1ha8l71 0.004995005 0.01234060
## jbypJkj3a8                       35  4 31 jbypJkj3a8 0.010989011 0.01424501
## wfvbs123Wn                       36  9 28 wfvbs123Wn 0.010989011 0.01424501
## c192S19qf7                       37 14 26 c192S19qf7 0.005994006 0.01234060
## aSzdcei682                       37  6 31 aSzdcei682 0.009990010 0.01380199
## q241I76nd4                       37 10 27 q241I76nd4 0.001998002 0.01234060
## hB3nx98zae                       38  6 32 hB3nx98zae 0.007992008 0.01234060
## a34K18e29d                       39  5 34 a34K18e29d 0.009990010 0.01380199
## ddf1bWbguf                       40 10 31 ddf1bWbguf 0.003996004 0.01234060
## Z8m99686m1                       42  8 37 Z8m99686m1 0.005994006 0.01234060
## Valj2s14g8                       42  9 33 Valj2s14g8 0.006993007 0.01234060
## yr8lL9pd54                       42  9 33 yr8lL9pd54 0.005994006 0.01234060
## P736124up6                       43  6 38 P736124up6 0.001998002 0.01234060
## wc8b25ckoY                       43 12 31 wc8b25ckoY 0.007992008 0.01234060
## y779g7Rjjm                       44  8 36 y779g7Rjjm 0.006993007 0.01234060
## u44Dr4gs86                       44  8 36 u44Dr4gs86 0.004995005 0.01234060
## nI83m4382t                       44  6 43 nI83m4382t 0.000999001 0.01234060
## y7g3x8266T                       46  9 38 y7g3x8266T 0.007992008 0.01234060
## G62a6q96v1                       46 10 37 G62a6q96v1 0.004995005 0.01234060
## yc3hB2f247                       47  9 38 yc3hB2f247 0.003996004 0.01234060
## tn95dVvyfq                       48 11 37 tn95dVvyfq 0.012987013 0.01585624
## i34257K5o6                       49 10 39 i34257K5o6 0.004995005 0.01234060
## r76lKaa359                       49  9 40 r76lKaa359 0.004995005 0.01234060
## f59k8Ac558                       50 10 41 f59k8Ac558 0.001998002 0.01234060
## v26h929uK8                       52 11 41 v26h929uK8 0.006993007 0.01234060
## N8hwg5979i                       52 12 40 N8hwg5979i 0.004995005 0.01234060
## gc7955R7k8                       52 11 41 gc7955R7k8 0.002997003 0.01234060
## nJ334736dc                       53 12 41 nJ334736dc 0.007992008 0.01234060
## e1n58k41tM                       54  7 47 e1n58k41tM 0.005994006 0.01234060
## niz32f6icK                       54 11 47 niz32f6icK 0.007992008 0.01234060
## Rbu59l2m2e                       55 13 43 Rbu59l2m2e 0.010989011 0.01424501
## hli575Zm3a                       60 12 49 hli575Zm3a 0.008991009 0.01329656
## j79z6Cf128                       60 11 54 j79z6Cf128 0.008991009 0.01329656
## M19lz3o828                       62 16 46 M19lz3o828 0.016983017 0.01981352
## eij9G9b2l2                       63 15 48 eij9G9b2l2 0.012987013 0.01585624
## Zh77t5831g                       63  9 54 Zh77t5831g 0.005994006 0.01234060
## Rw77rshc7y                       67  8 59 Rw77rshc7y 0.006993007 0.01234060
## q7u25p4rM7                       74  7 67 q7u25p4rM7 0.002997003 0.01234060
## tqH9bkdr78                       82 11 71 tqH9bkdr78 0.005994006 0.01234060
## Xa376ga9a3                       85 12 73 Xa376ga9a3 0.007992008 0.01234060
##            Fingerprintable
## a8681Py534              No
## hvu5113B2r              No
## j51h15E4xv              No
## nxaQkud83t              No
## Wj74c75671             Yes
## T1358ru144             Yes
## b9u81qz6G5              No
## b57665B367             Yes
## P9a579it51              No
## M3go34rg95             Yes
## Yjkv6qz9d1             Yes
## I89n948112             Yes
## o77M6w8m26             Yes
## b5F118a654             Yes
## b1l2a37p3Z             Yes
## aadbq388Ob             Yes
## ytbukz3wX4             Yes
## tu2b9S1ms9             Yes
## u17w5462Em             Yes
## Fljc85x4j4             Yes
## okj19fo9Fh             Yes
## ewv9511u6J             Yes
## x5Ny88741k             Yes
## vhs2B895gm             Yes
## I28284j41f             Yes
## ggr1delQ9y             Yes
## fgJ6xw7668             Yes
## x75646ye9T             Yes
## wG7615jxji             Yes
## y9s1dvTq2q             Yes
## neCz5h3c81             Yes
## c2i1N145o5             Yes
## m3837o9wA8             Yes
## z4z93Q7hs2             Yes
## z4wsu3Z4l2             Yes
## hy2wC38x85             Yes
## xj3R2c13ha             Yes
## ox17xwBqy6             Yes
## j6r171siTa             Yes
## Zd122o2d96             Yes
## p15y46v54P             Yes
## uw968g1F9o             Yes
## s145O1udx1             Yes
## gJuiz877gn             Yes
## S91d18oh3x             Yes
## x1J5788214             Yes
## tx43oiW5ps             Yes
## j1218Kcj5g             Yes
## ax5I99lf78             Yes
## Opb6k5eke1             Yes
## b83u91R6q5             Yes
## dKfyt77oq4             Yes
## jmHk8186fv             Yes
## e813o11n7T             Yes
## M97oq2u9z2             Yes
## p6o2qe1xQt             Yes
## kju391wN3s             Yes
## yzfoH84397             Yes
## jT2r93k9m8             Yes
## uxxiod4U24             Yes
## k7ifvzH4yx             Yes
## tb51gQ2f32             Yes
## qixzE71638             Yes
## b1Oe7158q9             Yes
## Yoea9j7gg5             Yes
## wUe1ha8l71             Yes
## jbypJkj3a8             Yes
## wfvbs123Wn             Yes
## c192S19qf7             Yes
## aSzdcei682             Yes
## q241I76nd4             Yes
## hB3nx98zae             Yes
## a34K18e29d             Yes
## ddf1bWbguf             Yes
## Z8m99686m1             Yes
## Valj2s14g8             Yes
## yr8lL9pd54             Yes
## P736124up6             Yes
## wc8b25ckoY             Yes
## y779g7Rjjm             Yes
## u44Dr4gs86             Yes
## nI83m4382t             Yes
## y7g3x8266T             Yes
## G62a6q96v1             Yes
## yc3hB2f247             Yes
## tn95dVvyfq             Yes
## i34257K5o6             Yes
## r76lKaa359             Yes
## f59k8Ac558             Yes
## v26h929uK8             Yes
## N8hwg5979i             Yes
## gc7955R7k8             Yes
## nJ334736dc             Yes
## e1n58k41tM             Yes
## niz32f6icK             Yes
## Rbu59l2m2e             Yes
## hli575Zm3a             Yes
## j79z6Cf128             Yes
## M19lz3o828             Yes
## eij9G9b2l2             Yes
## Zh77t5831g             Yes
## Rw77rshc7y             Yes
## q7u25p4rM7             Yes
## tqH9bkdr78             Yes
## Xa376ga9a3             Yes
fp_res$subids = factor(fp_res$subids, levels = ordered_fp_res_subs)
fp_perm_res$subids = rownames(fp_res)
fp_perm_res$subids = factor(fp_perm_res$subids, levels = ordered_fp_res_subs)

# melt perm data frame for plotting
melted_fp_perm_res = melt(fp_perm_res,id.vars = c("subids","Fingerprintable"))

# make plot
markerSize = 7
markerColor="blue"
fontSize = 10

# make ridges for the null distribution
p = ggplot(melted_fp_perm_res, aes(x = value, y = subids, fill=Fingerprintable)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + 
  xlab("# of Fingerprintable Stimuli") + ggtitle("# Fingerprintable Stimuli")

# add star for actual value
p = p + geom_text(data = fp_res, aes(x = nfingerprintable_stimuli, y=subids), color=markerColor, label="*",size=markerSize) +
  guides(color=FALSE, alpha=FALSE) + ggtitle(plot_title_name) + easy_center_title() + 
  theme(axis.text.x = element_text(size=fontSize),
        axis.text.y = element_text(size=fontSize),
        axis.title.x = element_text(size=fontSize),
        strip.text.x = element_text(size=fontSize),
        axis.title.y = element_text(size=fontSize),
        plot.title = element_text(hjust = 0.5))
ggsave(p,filename = file.path(plot_path, "number_fingerprintable_stimuli_per_subject.pdf"),
       width=6,height=7)
p

# make ggridges plot for gaze uniqueness index (GUI; aka fingerprint ratio)
# test each subject for whether GUI is greater than the null value of 0 after log10 transformation (because GUI is positively skeweed). This will allow you to identify which subjects have an overall GUI across all 700 stimuli greater than the null value of 0.
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)
tmp_gui = fpratio_sub_by_stim %>% filter(!na_mask)
tmp_gui = t(tmp_gui)

gui_res_sub = data.frame(matrix(nrow = dim(tmp_gui)[2], ncol = 4))
colnames(gui_res_sub) = c("subid","tstat","pval","fdr")
rownames(gui_res_sub) = colnames(tmp_gui)
subids2use = colnames(tmp_gui)
for (sub in subids2use){
  gui_res_sub[sub, "subid"] = sub
  tres = t.test(log10(tmp_gui[,sub]), mu = 0)
  gui_res_sub[sub, "tstat"] = tres$statistic[[1]]
  gui_res_sub[sub, "pval"] = tres$p.value[[1]]
}
gui_res_sub$fdr = p.adjust(gui_res_sub$pval, method = "fdr")
gui_res_sub$Unique = "Yes"
# anything with tstat<0 of fdr>0.05 is not unique
gui_res_sub$Unique[gui_res_sub$tstat<0 | gui_res_sub$fdr>0.05] = "No"
gui_res_sub = gui_res_sub[order(gui_res_sub$tstat),]
gui_res_sub$subid = factor(gui_res_sub$subid, levels = gui_res_sub$subid)
table(gui_res_sub$Unique)
## 
##  No Yes 
##  21  84
gui_res_sub = gui_res_sub[order(-gui_res_sub$tstat),]
gui_res_sub$uniqueness_rank = c(1:dim(gui_res_sub)[1]) # make gaze uniqueness ranking based on strength of tstat

gui_res = fpratio_sub_by_stim %>% filter(!na_mask)
gui_res$subid = rownames(gui_res)
gui_res$subid = factor(gui_res$subid, levels = rev(gui_res_sub$subid))
gui_res = merge(gui_res, gui_res_sub[,c("subid","Unique")])
melted_gui_res = melt(gui_res, id.vars = c("subid","Unique"))

# make ridges plot
p = ggplot(melted_gui_res, aes(x = value, y = subid, fill=Unique)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + xlim(-1,7) + geom_vline(xintercept = 1) + 
  xlab("Gaze Uniqueness Index (GUI)") + ggtitle(plot_title_name) + easy_center_title()
ggsave(p,filename = file.path(plot_path, "gaze_uniqueness_index_per_subject.pdf"),
       width=6,height=7)
p

# subjects to remove because of too many NAs
na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)

# compute jaccard distance matrix which is percentage of overlap in fingerprintable stimuli
jaccard_mat = as.matrix(dist(as.matrix(classifier_output_sub_by_stim), method = "binary"))
jaccard_mat = jaccard_mat[-which(na_mask),-which(na_mask)]
heatmap(as.matrix(jaccard_mat), 
        hclustfun=function(d) hclust(d, method=clust_method),
        scale = "none")

# plot heatmap of classifier output matrix of subjects by stimuli matrix
tmp_data = classifier_output_sub_by_stim[-which(na_mask),]
heatmap(as.matrix(tmp_data), 
        hclustfun=function(d) hclust(d, method=clust_method),
        scale = "none")

nb_clust_res = NbClust(data = as.matrix(tmp_data),index = "silhouette", distance = "binary",method = "ward.D2", min.nc=1,max.nc=dim(tmp_data)[1]-1)
nb_clust_res$Best.nc
## Number_clusters     Value_Index 
##        104.0000          0.9817

Which semantic features are enriched as fingerprintable stimuli?

And, can we cluster people by these semantic feature profiles of fingerprintable stimuli?

df_sem_feat = data.frame(df_sem_feat)
sem_feat_names = colnames(df_sem_feat)

nsubs = dim(fpratio_sub_by_stim)[1]
mean_fpr_per_sem_feat_res = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
rownames(mean_fpr_per_sem_feat_res) = rownames(fpratio_sub_by_stim)
colnames(mean_fpr_per_sem_feat_res) = sem_feat_names
mean_fpr_per_sem_feat_res$all = NA
for (isub in 1:nsubs){
  for (isf in 1:length(sem_feat_names)){
    # get mean fingerprint ratio
    mean_fpr_per_sem_feat_res[isub, isf] = median(as.numeric(fpratio_sub_by_stim[isub,df_sem_feat[,isf]==1]), na.rm = TRUE)
  } # for (isf in 1:length(sem_feat_names)){
  mean_fpr_per_sem_feat_res[isub, "all"] = median(as.numeric(fpratio_sub_by_stim[isub,]), na.rm = TRUE)
  mean_fpr_per_sem_feat_res[isub, "c1"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==1]), na.rm = TRUE)
  mean_fpr_per_sem_feat_res[isub, "c2"] = median(as.numeric(fpratio_sub_by_stim[isub,cluster_res[,"clust_labels"]==2]), na.rm = TRUE)
} # for (isub in 1:nsubs){

df_res = classifier_output_sub_by_stim

# remove subs that don't have statistically significant nfingerprintable stimuli
df_res = df_res[fp_subs2include,]

nsubs = dim(df_res)[1]

nTotal = nstimuli

enrich_res_OR = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_OR) = sem_feat_names
rownames(enrich_res_OR) = rownames(df_res)

enrich_res_P = data.frame(matrix(nrow = nsubs, ncol = length(sem_feat_names)))
colnames(enrich_res_P) = sem_feat_names
rownames(enrich_res_P) = rownames(df_res)

nfp_res = data.frame(matrix(nrow = nsubs, ncol=1))
rownames(nfp_res) = rownames(df_res)
colnames(nfp_res) = c("nFP")

for (isub in 1:nsubs){
  
  nFP = sum(df_res[isub,])
  nfp_res[isub,1] = nFP
  fp_mask = df_res[isub,]==1
  
  for (isf in 1:length(sem_feat_names)){
    semfeat2use = sem_feat_names[isf]
    nSemantic = sum(df_sem_feat[,semfeat2use])
    nFP_given_Semantic = sum(df_sem_feat[fp_mask,semfeat2use])
    tmp_enrich_res = enrichmentTest(nFP_given_Semantic, nFP, nSemantic, nTotal)
    enrich_res_OR[isub,semfeat2use] = tmp_enrich_res$OR
    enrich_res_P[isub,semfeat2use] = tmp_enrich_res$p
    
  } # for (isf in 1:length(sem_feat_names)){
  
} # for (isub in 1:nsubs){

clust_method = "ward.D2"
nbclust_index = c("kl","ch","ccc","cindex","db","silhouette","duda","pseudot2",
                  "ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
                  "sdindex")
res = NbClust(data = enrich_res_OR, method = clust_method, index = nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
res = NbClust(data = enrich_res_OR, method = clust_method, index = "ch")
row_subtype = res$Best.partition
row_cols = brewer.pal(9, "Set1")[res$Best.partition]

nbclust_index = c("kl","ch","cindex","db","silhouette","duda","pseudot2",
                  "ratkowsky","ptbiserial","gap","mcclain","gamma","gplus","tau",
                  "sdindex")
res = NbClust(data = t(enrich_res_OR), method = clust_method, index=nbclust_index)
a = data.frame(res$Best.nc[1,]); colnames(a) = "k"; a$index = rownames(a)
best_nk = as.numeric(names(table(a$k))[table(a$k)==max(table(a$k))])
res = NbClust(data = t(enrich_res_OR), method = clust_method, index="cindex")
col_cols = brewer.pal(9, "Set1")[res$Best.partition]
feature_clusters = data.frame(feature = colnames(enrich_res_OR), cluster = col_cols)

new_row_cols = get_ggColorHue(6)
tmp_row_cols = unique(row_cols)
row_cols[row_cols==tmp_row_cols[1]] = new_row_cols[3]
row_cols[row_cols==tmp_row_cols[2]] = new_row_cols[4]

tmp_col_cols = unique(col_cols)
col_cols[col_cols==tmp_col_cols[1]] = new_row_cols[5]
col_cols[col_cols==tmp_col_cols[2]] = new_row_cols[6]

heatmap(as.matrix(enrich_res_OR), 
        hclustfun=function(d) hclust(d, method=clust_method),
        scale = "none",
        RowSideColors=row_cols, 
        ColSideColors=col_cols)

# plot this so you can screenshot the color scale 
ggheatmap(as.matrix(enrich_res_OR), color=colorRampPalette(hcl.colors(12, "YlOrRd", rev = TRUE))(100), scale="none",legendName="OR")

subtype_df = data.frame(subid = rownames(enrich_res_OR), subtype = row_subtype)
table(subtype_df$subtype)
## 
##  1  2 
## 64 35
enrich_res_OR$subid = rownames(enrich_res_OR)

df2plot = merge(enrich_res_OR, subtype_df, by = "subid")
rownames(df2plot) = df2plot$subid
df2plot$subtype = factor(df2plot$subtype)
df4plot = melt(df2plot, id.vars = c("subid","subtype"))

h_test_res = list()
res_colnames = c("feature","t","p","d")

for (subtype in unique(row_subtype)){
  subtype_name = sprintf("S%d",subtype)
  h_test_res[[subtype_name]] = data.frame(matrix(
    nrow = length(sem_feat_names), 
    ncol = length(res_colnames)))
  rownames(h_test_res[[subtype_name]]) = sem_feat_names
  colnames(h_test_res[[subtype_name]]) = res_colnames
}

for (subtype in unique(row_subtype)){
  subtype_name = sprintf("S%d",subtype)
  for (sem_feat in sem_feat_names){
      h_test_res[[subtype_name]][sem_feat, "feature"] = sem_feat
      
      t_res = t.test(df2plot[df2plot$subtype==subtype,sem_feat], mu=1)
      h_test_res[[subtype_name]][sem_feat, "t"] = t_res$statistic
      h_test_res[[subtype_name]][sem_feat, "p"] = t_res$p.value
      
      tmp_mu = mean(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
      tmp_sd = sd(df2plot[df2plot$subtype==subtype,sem_feat], na.rm = TRUE)
      h_test_res[[subtype_name]][sem_feat, "d"] = (tmp_mu-1)/tmp_sd
  }
  h_test_res[[subtype_name]]$fdr = p.adjust(h_test_res[[subtype_name]]$p, method = "fdr")
}

# check if subtypes differ by sex
a = merge(pheno_data,df2plot[,c("subid","subtype")], by.x = "subj_ID",by.y = "subid")
chisq.test(table(a$sex, a$subtype))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(a$sex, a$subtype)
## X-squared = 0.049762, df = 1, p-value = 0.8235
reorder_vect = order(h_test_res[["S2"]][,"t"])
ordered_sem_feat_by_t = c("gazed","touched","operability","touch","animal",
                          "sound","smell","nonsocial","taste","text","watchability",
                          "social","face","human","emotion","motion")
for (subtype in unique(row_subtype)){
  subtype_name = sprintf("S%d",subtype)
  print(subtype_name)
  print(h_test_res[[subtype_name]][ordered_sem_feat_by_t,])
}
## [1] "S1"
##                   feature          t            p           d         fdr
## gazed               gazed -0.6901391 0.4926413423 -0.08626739 0.606327806
## touched           touched -0.0157684 0.9874689880 -0.00197105 0.987468988
## operability   operability  1.7295878 0.0886010506  0.21619847 0.202516687
## touch               touch  0.1247483 0.9011200362  0.01559354 0.961194705
## animal             animal  0.4900551 0.6257972088  0.06125689 0.715196810
## sound               sound  1.3624726 0.1778998357  0.17030908 0.355799671
## smell               smell  2.8522034 0.0058653541  0.35652543 0.031281889
## nonsocial       nonsocial  2.2086022 0.0308482332  0.27607527 0.082261955
## taste               taste  3.4190864 0.0011067413  0.42738580 0.008853930
## text                 text  2.5570253 0.0129779816  0.31962817 0.051911926
## watchability watchability  2.4290659 0.0180022380  0.30363324 0.057607162
## social             social -1.0989039 0.2759918479 -0.13736299 0.401442688
## face                 face -1.2942636 0.2002986308 -0.16178295 0.356065840
## human               human -0.7078459 0.4816515429 -0.08848074 0.606327806
## emotion           emotion  1.2319635 0.2225411500  0.15399544 0.356065840
## motion             motion -4.0487783 0.0001433886 -0.50609729 0.002294218
## [1] "S2"
##                   feature           t            p          d          fdr
## gazed               gazed   3.9701013 3.527683e-04  0.6710696 8.063277e-04
## touched           touched   4.9799653 1.823580e-05  0.8417678 4.862881e-05
## operability   operability  -3.6160523 9.582779e-04 -0.6112244 1.533245e-03
## touch               touch  -3.3993653 1.739519e-03 -0.5745976 2.530209e-03
## animal             animal   1.1630151 2.529213e-01  0.1965854 2.697827e-01
## sound               sound   1.2744010 2.111630e-01  0.2154131 2.413292e-01
## smell               smell  -5.9557618 9.829425e-07 -1.0067075 3.145416e-06
## nonsocial       nonsocial -11.7800774 1.499182e-13 -1.9911965 2.398692e-12
## taste               taste  -3.3139543 2.192178e-03 -0.5601605 2.922904e-03
## text                 text   1.8480581 7.330821e-02  0.3123788 9.022549e-02
## watchability watchability   0.8382835 4.077263e-01  0.1416958 4.077263e-01
## social             social   8.2258883 1.345073e-09  1.3904289 7.173721e-09
## face                 face   6.2060659 4.657501e-07  1.0490166 1.863001e-06
## human               human   8.3173376 1.042847e-09  1.4058867 7.173721e-09
## emotion           emotion   3.6212376 9.445559e-04  0.6121009 1.533245e-03
## motion             motion   3.6301242 9.214799e-04  0.6136030 1.533245e-03
df4plot$variable = factor(df4plot$variable, levels = ordered_sem_feat_by_t)
colnames(df4plot)[colnames(df4plot)=="variable"] = "feature"
colnames(df4plot)[colnames(df4plot)=="value"] = "OR"
feature_clusters$feature = factor(feature_clusters$feature, levels = levels(df4plot$feature))
df4plot$feature_cluster = NA
for (ifeat in 1:length(sem_feat_names)){
  feature_name2use = sem_feat_names[ifeat]
  cluster2use = feature_clusters[feature_clusters$feature==feature_name2use, "cluster"]
  df4plot$feature_cluster[df4plot$feature==feature_name2use] = cluster2use
}
df4plot$feature_cluster = factor(df4plot$feature_cluster)
fingerprint_profiles = df4plot

p = ggplot(data = df4plot, aes(x = feature, y = OR, colour=feature_cluster)) + 
  facet_grid(subtype ~ .) + 
  geom_scatterbox() + 
  geom_hline(yintercept=1) + 
  xlab("Semantic Features") + 
  ylab("Odds Ratio") +
  scale_colour_manual(values = c("#F564E3","#619CFF")) +
  guides(colour=FALSE) +
  coord_flip() + ggtitle(plot_title_name) + easy_center_title()
ggsave(filename = file.path(plot_path, "fingerprintable_stimuli_semantic_enrichment.pdf"),
       width = 4, height = 5)
p

Do gaze fingerprint subtypes or number of fingerprintable stimuli correlate with autistic traits?

colnames(pheno_data)[colnames(pheno_data)=="subj_ID"] = "subid"

df2use = right_join(df2plot, pheno_data, by="subid")
df2use$subtype2 = "NF"
unique_subtypes = levels(df2use$subtype)
for (subtype in unique_subtypes){
  df2use$subtype2[df2use$subtype==subtype] = sprintf("S%s",subtype)
}
df2use = merge(df2use, fp_res, by.x = "subid", by.y = "subids")

na_mask = rowSums(is.na(intrasubR_sub_by_stim))>(700*0.15)

mean_fpr_per_sem_feat_res = mean_fpr_per_sem_feat_res %>% filter(!na_mask) 
mean_fpr_per_sem_feat_res$subid = rownames(mean_fpr_per_sem_feat_res)
new_df = merge(mean_fpr_per_sem_feat_res, pheno_data, by = "subid")
pca_res = princomp(scale(new_df[,c("AQ_Tot","SRS_Tot_Raw")]))
summary(pca_res)
## Importance of components:
##                           Comp.1    Comp.2
## Standard deviation     1.3269348 0.4692510
## Proportion of Variance 0.8888431 0.1111569
## Cumulative Proportion  0.8888431 1.0000000
new_df$at_pc1 = pca_res$scores[,1]

new_df = merge(new_df, gui_res_sub, by = "subid")
new_df = merge(new_df, df2use[,c("subid","nfingerprintable_stimuli")])

# compute median intra-subject similarity
a = data.frame(med_intrasub=rowMedians(as.matrix(intrasubR_sub_by_stim), na.rm=TRUE), subid=rownames(intrasubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")

# compute median avg inter-subject similarity
a = data.frame(med_intersub=rowMedians(as.matrix(mean_intersubR_sub_by_stim), na.rm=TRUE), subid=rownames(mean_intersubR_sub_by_stim))
a = a %>% filter(!na_mask)
new_df = merge(new_df, a, by ="subid")

# global gui is median intra-sub divided by median avg inter-sub
new_df$global_gui = new_df$med_intrasub/new_df$med_intersub

# PCA gaze fingerprinting metrics
pca_res = princomp(scale(new_df[,c("all","nfingerprintable_stimuli")]))
summary(pca_res)
## Importance of components:
##                           Comp.1     Comp.2
## Standard deviation     1.3360906 0.44250898
## Proportion of Variance 0.9011515 0.09884851
## Cumulative Proportion  0.9011515 1.00000000
new_df$fp_pc1 = pca_res$scores[,1]
new_df$fp_pc2 = pca_res$scores[,2]

# define who is an autistic traits outlier by either SRS or AQ
new_df$AQ_outlier = "No"
new_df$AQ_outlier[new_df$AQ_Tot>26] = "Yes" # Woodbury-Smith et al., 2005 JADD cut-off
new_df$SRS_outlier = "No"
new_df$SRS_outlier[new_df$SRS_Tot_Raw>80.81] = "Yes" # 2 SD cut-off for TD Italian adults from SRS-2 manual
new_df$AT_outlier = new_df$SRS_outlier=="Yes" | new_df$AQ_outlier=="Yes" 
  
# test correlation between AQ and SRS
cor.test(new_df$AQ_Tot, new_df$SRS_Tot_Raw)
## 
##  Pearson's product-moment correlation
## 
## data:  new_df$AQ_Tot and new_df$SRS_Tot_Raw
## t = 12.555, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6886711 0.8436072
## sample estimates:
##       cor 
## 0.7776862
p = ggplot(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw)) +   
  geom_point(data = new_df, aes(x = AQ_Tot, y = SRS_Tot_Raw, colour = AT_outlier)) + 
  geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
  xlab("AQ Total") + ylab("SRS Total")
ggsave(p, filename = file.path(plot_path, "AQ_by_SRS.pdf"))
p

# test correlation between number of fingerprintable stimuli and median GUI
cor.test(new_df$nfingerprintable_stimuli, new_df$all)
## 
##  Pearson's product-moment correlation
## 
## data:  new_df$nfingerprintable_stimuli and new_df$all
## t = 13.641, df = 103, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.7216009 0.8614933
## sample estimates:
##      cor 
## 0.802303
p = ggplot(data = new_df, aes(x = nfingerprintable_stimuli, y = all)) +   
  geom_point() + 
  geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
  xlab("Fingerprintable Stimuli") + ylab("Median Gaze Uniqueness Index (GUI)")
ggsave(p, filename = file.path(plot_path, "nfingerprintable_stimuli_by_medianGUI.pdf"))
p

# run linear model with sex and FP PC1 and PC2 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s + %s","fp_pc1","fp_pc2"))
mod2use = lmrob(data = new_df, formula = form2use)
summary(mod2use)$coefficients
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) -0.3817752 0.16382788 -2.330343 2.177677e-02
## sexM         0.4328317 0.20074730  2.156102 3.344986e-02
## fp_pc1      -0.3578814 0.06787054 -5.273000 7.658472e-07
## fp_pc2      -0.3224181 0.20831048 -1.547777 1.248038e-01
summary(mod2use)$r.squared
## [1] 0.2329668
summary(mod2use)$adj.r.squared
## [1] 0.2101836
# model comparisons - gaze fingerprinting PC1, median intra-sub, median inter-sub
model_variances = data.frame(matrix(nrow = 3, ncol = 2))
rownames(model_variances) = c("Gaze Fingerprinting PC1","Median Intra-Subject Similarity","Median Inter-Subject Similarity")
colnames(model_variances) = c("Model","Percentage Variance Explained")
# run linear model with sex and FP PC1 predicting autistic traits PC1
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","fp_pc1"))
model1 = lmrob(data = new_df, formula = form2use)
summary(model1)$coefficients
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) -0.4113238  0.1679866 -2.448552 1.604928e-02
## sexM         0.4834853  0.2021414  2.391817 1.859694e-02
## fp_pc1      -0.3556895  0.0696950 -5.103515 1.549848e-06
summary(model1)$r.squared
## [1] 0.212732
model_variances["Gaze Fingerprinting PC1","Model"] = "Gaze Fingerprinting PC1"
model_variances["Gaze Fingerprinting PC1","Percentage Variance Explained"] = summary(model1)$r.squared
summary(model1)$adj.r.squared
## [1] 0.1972954
# run linear model with sex and median intrasub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intrasub"))
model2 = lmrob(data = new_df, formula = form2use)
summary(model2)$coefficients
##                Estimate Std. Error   t value    Pr(>|t|)
## (Intercept)   1.7593069  0.8281129  2.124477 0.036047008
## sexM          0.4429568  0.2136416  2.073364 0.040658312
## med_intrasub -3.8299968  1.4477434 -2.645494 0.009448267
summary(model2)$r.squared
## [1] 0.1167606
model_variances["Median Intra-Subject Similarity","Model"] = "Median Intra-Subject Similarity"
model_variances["Median Intra-Subject Similarity","Percentage Variance Explained"] = summary(model2)$r.squared
summary(model2)$adj.r.squared
## [1] 0.09944221
# run linear model with sex and median avg intersub-similarity
form2use = as.formula(sprintf("at_pc1 ~ sex + %s","med_intersub"))
model3 = lmrob(data = new_df, formula = form2use)
summary(model3)$coefficients
##                 Estimate Std. Error     t value   Pr(>|t|)
## (Intercept)  -0.30722743   1.306156 -0.23521494 0.81451349
## sexM          0.38589817   0.223247  1.72857046 0.08691185
## med_intersub -0.09931571   2.951955 -0.03364405 0.97322674
summary(model3)$r.squared
## [1] 0.02961302
model_variances["Median Inter-Subject Similarity","Model"] = "Median Inter-Subject Similarity"
model_variances["Median Inter-Subject Similarity","Percentage Variance Explained"] = summary(model3)$r.squared
summary(model3)$adj.r.squared
## [1] 0.01058582
feature2use = "fp_pc1"
p = ggplot(data = new_df, aes(x = .data[[feature2use]], y = at_pc1)) +   
  geom_point(data = new_df, aes(x = fp_pc1, y = at_pc1, colour = AT_outlier)) + 
  geom_smooth(method = lmrob, colour="black") + guides(colour=FALSE) +
  ylab("Autistic Traits PC1") + xlab("Gaze Fingerprinting PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_PC1_by_fingerprinting_PC1.pdf"))
p

x_var = "Percentage Variance Explained"
y_var = "Model"
model_variances$Model = factor(model_variances$Model, 
                               levels = rev(c("Gaze Fingerprinting PC1",
                                              "Median Intra-Subject Similarity",
                                              "Median Inter-Subject Similarity")))
p = ggplot(data = model_variances, aes(x = .data[[x_var]], y = .data[[y_var]])) + 
  geom_bar(stat = "identity") + xlab("Percentage Variance Explained in Autistic Traits PC1")
ggsave(p, filename = file.path(plot_path, "autistic_traits_model_comparisons.pdf"))
p